% Main Script: run_empirical_1.m
%   Description:
%       This script produces the estimation and inference results for the actual data set with the
%       bi-annual dummies as in Wolfers (2006), where the experiment is based on the seminal studies
%       of the effects of unilateral divorce law reforms on the US divorce rates by Friedberg (1998) 
%       and Wolfers (2006), subsequently revisited by Kim and Oka (2014) and Moon and Weidner (2015)
%       in the context of interactive fixed effects models
%   Output:
%       Table 5: LS and debiased estimates and 95% CIs for beta

% ========================================
% Data from the studies of the effects of unilateral divorce law reforms on the US divorce rates
% ========================================
% ==============================
% Clean the data
% ==============================
% We first repeat the Table 3 in Kim and Oka (2014) by fixing some coding issues
clear all;
% Load the data, where the original data and code are from Kim and Oka (2014)
DATA  = csvread('DivorceRate1956_1988.csv', 1, 1);
% Construct state ID and drop IN (id == 16) and NM (id == 33)
id_st = DATA(:,1);
ind02  = (id_st ~= 16 & id_st ~= 33);
DATA02 = DATA(ind02,:);
id_st = DATA02(:,1);
year  = DATA02(:,2);
% Dependent variable
Y02 = (DATA02(:,4));
% State population
stpop = DATA02(:,5);
% Dummy variable for Friedberg's unilateral law
d_unilateral = DATA02(:,6);
% Dynamic treatment effects for Friedberg's unilateral law
dyn_unilateral = DATA02(:,7:14);
% sample size
NT = size(DATA02, 1);
T  = max(year) - min(year) + 1;
N  = NT / T;
% dimension of the regressors
K = 8;
% Convert dependend variable and regressors in NT-vector form:
YY=Y02;
XX=dyn_unilateral;
% ==============================
% Transform data into matrix form for later steps
% ==============================
Y = (reshape(YY,[T,N]))';
X = zeros(K,N,T);
for k=1:K
  X(k,:,:) = (reshape(XX(:,k),[T,N]))';
end
% Using only one regressor
D(1,:,:) = squeeze(sum(X));
X = D;
K = 1;

% ========================================
% Estimation using LS_factor
% ========================================
beta0 = zeros(K,1);
% Maximum number of the factors
Rmax = 9;
% Control for standard time effects
lambda_known=ones(N,1);
% Control for standard individual specific effects + time trend + time trend.^2
trend=(1:T)';
f_known=[ones(T,1),trend,trend.^2];
% Project out (generalized within transformation) all known factor loadings
if size(lambda_known,2)>0
    Y=Y - lambda_known * (lambda_known\Y);
    for k=1:K
        xx=squeeze(X(k,:,:));
        X(k,:,:)= xx - lambda_known * (lambda_known\xx);
    end
end
% Project out (generalized within transformation) all known factors
if size(f_known,2)>0
    Y=( Y' - f_known * (f_known\Y') )';
    for k=1:K
        xx=squeeze(X(k,:,:));
        X(k,:,:)= ( xx' - f_known * (f_known\xx') )';
    end
end
% Estimation
for R = 0:Rmax
  [beta,exitflag,lambda,f,Vbeta1,Vbeta2,Vbeta3,bcorr1,bcorr2,bcorr3]=LS_factor2(Y,X,R,lambda_known,f_known,'silent',10^-8,'m1',beta0,30,300,2,2);
  % Parameter estimate
  beta_naiv(:,R+1)=beta;
  % Correcting bias due to cross-sectional heteroscedasticity of errors (bcorr2)
  %                      & time-serial heteroscedasticity and-serial correlation of errors (bcorr3)
  beta_bias_corrected(:,R+1)=beta - bcorr2 - bcorr3;
  % Variance-covariance matrix of beta, assuming homoscedasticity of errors in both dimensions
  std_est_homo(:,R+1)=sqrt(diag(Vbeta1));
  % Variance-covariance matrix of beta, assuming heteroscedasticity of errors in both dimensions
  std_est_hetero(:,R+1)=sqrt(diag(Vbeta2));
end
alpha = 0.05;
beta_LS = beta_bias_corrected(:,2:end);
se_LS = std_est_hetero(:,2:end);
LB_LS = beta_LS - se_LS*norminv(1-alpha/2);
UB_LS = beta_LS + se_LS*norminv(1-alpha/2);

% ========================================
% Estimation using honest weak factor
% ========================================
for R = 1:Rmax
    [beta, bias, se, LB, UB, LB_s, UB_s, A] = honest_weak_factors(Y, X, R);
    bias_1 = bias/R;
    beta_h(:,R) = beta;
    % Standard confidence intervals without adjustment
    LB_h_s(:,R) = LB_s;
    UB_h_s(:,R) = UB_s;
    % Bias-aware confidence intervals as if the number of weak factors being 1 
    LB_h_1(:,R) = beta - bias_1 - se*norminv(1-alpha/2);
    UB_h_1(:,R) = beta + bias_1 + se*norminv(1-alpha/2);
    % Bias-aware confidence intervals with the number of weak factors being R
    LB_h(:,R) = LB;
    UB_h(:,R) = UB;
end

% ========================================
% Output Table 5 as CSV file
% ========================================
if ~isfolder('tables')
    mkdir('tables')
end
% Only report results with up to the number of factors being 6
Rmax = 6;
% Construct array with the results
tab_arr = [beta_LS(1:Rmax); LB_LS(1:Rmax); UB_LS(1:Rmax); ...
    beta_h(1:Rmax); LB_h_s(1:Rmax); UB_h_s(1:Rmax); LB_h_1(1:Rmax); UB_h_1(1:Rmax); LB_h(1:Rmax); UB_h(1:Rmax)];
% Convert array to table
columnLabels = arrayfun(@(x) ['R=' num2str(x)], 1:Rmax, 'UniformOutput', false);
rowLabels = {'LS Estimate', 'LS CI lower bound', 'LS CI upper bound', ...
    'H Estimate', 'H CI lower bound (R_w=0)', 'H CI upper bound (R_w=0)', ...
    'H CI lower bound (R_w=1)', 'H CI upper bound (R_w=1)', ...
    'H CI lower bound (R_w=R)', 'H CI upper bound (R_w=R)'};
tab5  = array2table(tab_arr, 'RowNames', rowLabels, 'VariableNames', columnLabels);
% Output table
writetable(tab5, 'tables/table5.csv', 'WriteRowNames', true);